Load R packages

library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(plotlyutils)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally) ; library(ggpubr)
library(Rtsne)
library(rstatix)
library(expss)
library(ClusterR)
library(DESeq2) ; library(biomaRt)
library(knitr)

Load preprocessed dataset (code in 2.2.Preprocessing_pipeline.Rmd)

# Load Gandal dataset
load('./../Data/preprocessedData/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
datMeta = datMeta %>% dplyr::rename(Batch = RNAExtractionBatch)




2.3.1 Exploratory analysis


Distribution of sample features by diagnosis


Diagnosis and brain region seem to be balanced

table_info = datMeta %>% apply_labels(Diagnosis = 'Diagnosis', Brain_lobe = 'Brain Lobe', 
                                      Batch = 'Batch', Sex = 'Gender')

cro(table_info$Diagnosis, list(table_info$Brain_lobe,total()))
 Brain Lobe     #Total 
 Frontal   Occipital   
 Diagnosis 
   CTL  19 30   49
   ASD  16 24   40
   #Total cases  35 54   89


There are many more Male than Female samples, and there seems to be a small correlation between gender and diagnosis, with an underepresentation of male ASD samples

cro(table_info$Diagnosis, list(table_info$Sex, total()))
 Gender     #Total 
 F   M   
 Diagnosis 
   CTL  10 39   49
   ASD  11 29   40
   #Total cases  21 68   89


The difference in age between diagnosis groups is not statistically significant, the only difference seems to be that ASD samples have a larger age dispersion

datMeta %>% ggplot(aes(Diagnosis, Age, fill = Diagnosis)) + geom_boxplot() + 
            geom_jitter(color='gray', size=2, alpha = 0.5) +
            stat_compare_means(label = 'p.signif', method = 't.test', method.args = list(var.equal = FALSE)) + 
            theme_minimal() + theme(legend.position = 'none')



Visualisations of the samples


PCA plots


The first principal component separates the samples by diagnosis groups quite well, there is no recognisable pattern in the samples coloured by brain lobe

pca = datExpr %>% t %>% prcomp

plot_data = pca$x %>% data.frame %>% dplyr::select(PC1:PC2) %>% 
            mutate('Sample_ID'= datExpr %>% colnames) %>% 
            left_join(datMeta, by='Sample_ID') %>% 
            dplyr::select('Sample_ID','PC1','PC2','Diagnosis','Brain_lobe') %>% 
            mutate('Diagnosis' = factor(Diagnosis, levels=c('CTL','ASD')))

p1 = plot_data %>% ggplot(aes(PC1, PC2, color=Diagnosis, shape=Diagnosis)) + geom_point(alpha = 0.8) +
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],1),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],1),'%)')) +
              #ggtitle('Samples coloured by Diagnosis') + 
  coord_fixed() + theme_minimal() + theme(legend.position = 'bottom')

p2 = plot_data %>% ggplot(aes(PC1, PC2, color=Brain_lobe, shape=Brain_lobe)) + geom_point(alpha = 0.8) +
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],1),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],1),'%)')) +
              #ggtitle('Samples coloured by Brain Lobe') + 
  coord_fixed() + labs(color = 'Brain Lobe', shape = 'Brain Lobe') + 
              theme_minimal() + theme(legend.position = 'bottom')

grid.arrange(p1, p2, nrow = 1)

rm(pca, p1, p2)


t-SNE plots


T-SNE seems to be struggling to separate the samples by Diagnosis

perplexities = c(2,5,10,20)
ps = list()

for(i in 1:length(perplexities)){
  set.seed(123)
  tsne = datExpr %>% t %>% Rtsne(perplexity=perplexities[i])
  plot_data = data.frame('ID'=colnames(datExpr), 'C1'=tsne$Y[,1], 'C2'=tsne$Y[,2]) %>%
              left_join(datMeta, by=c('ID'='Sample_ID')) %>%
              dplyr::select('C1','C2','Diagnosis','Subject_ID') %>%
              mutate('Diagnosis'=factor(Diagnosis, levels=c('CTL','ASD')))
  ps[[i]] = plot_data %>% ggplot(aes(C1, C2, color=Diagnosis, shape=Diagnosis)) + geom_point(alpha = 0.7) + theme_minimal() +
            ggtitle(paste0('Perplexity = ',perplexities[i])) + theme(legend.position='none') + coord_fixed()
}

grid.arrange(grobs=ps, nrow=2)

This time, there does seem to be some small local clusters related to brain lobe

perplexities = c(2,5,10,20)
ps = list()

for(i in 1:length(perplexities)){
  set.seed(123)
  tsne = datExpr %>% t %>% Rtsne(perplexity=perplexities[i])
  plot_data = data.frame('ID'=colnames(datExpr), 'C1'=tsne$Y[,1], 'C2'=tsne$Y[,2]) %>%
              left_join(datMeta, by=c('ID'='Sample_ID')) %>%
              dplyr::select('C1','C2','Brain_lobe','Subject_ID')
  ps[[i]] = plot_data %>% ggplot(aes(C1, C2, color=Brain_lobe, shape=Brain_lobe)) + geom_point(alpha = 0.7) + theme_minimal() +
            ggtitle(paste0('Perplexity = ',perplexities[i])) + theme(legend.position='none') + coord_fixed()
}

grid.arrange(grobs=ps, nrow=2)


In the Gandal dataset, some perplexity values managed to capture the subject the samples belonged to, but it doesn’t seem to do it with this new dataset

perplexities = c(2,5,10,20)
ps = list()

for(i in 1:length(perplexities)){
  set.seed(123)
  tsne = datExpr %>% t %>% Rtsne(perplexity=perplexities[i])
  plot_data = data.frame('ID'=colnames(datExpr), 'C1'=tsne$Y[,1], 'C2'=tsne$Y[,2]) %>%
              left_join(datMeta, by=c('ID'='Sample_ID')) %>%
              dplyr::select('C1','C2','Brain_lobe','Subject_ID')
  ps[[i]] = plot_data %>% ggplot(aes(C1, C2, color=Subject_ID)) + geom_point(alpha = 0.6) + theme_minimal() +
            ggtitle(paste0('Perplexity = ',perplexities[i])) + theme(legend.position='none') + coord_fixed()
}

grid.arrange(grobs=ps, nrow=2)


Interactive version of the last plot above so we can corroborate the subject ID’s from each sample (because having so many different subject IDs, subjects assigned similar colours may be confused)

ggplotly(plot_data %>% ggplot(aes(C1, C2, color=Subject_ID)) + geom_point(aes(id=Subject_ID)) + theme_minimal() + 
         theme(legend.position='none') + ggtitle('t-SNE Perplexity = 20 coloured by Subject ID'))



Visualisations of the genes


PCA Plot


The first principal component explains over 99% of the variance in the dataset and is strongly related to the mean level of expression of the genes

pca = datExpr %>% prcomp

plot_data = data.frame( 'PC1' = pca$x[,1], 'PC2' = pca$x[,2], 'MeanExpr'=rowMeans(datExpr))

plot_data %>% ggplot(aes(PC1, PC2, color=MeanExpr)) + geom_point(alpha=0.3) + theme_minimal() + 
              scale_color_viridis() + ggtitle('PCA') + coord_fixed() +
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],1),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],1),'%)')) + 
              theme(legend.position = 'bottom', legend.key.width=unit(3, 'cm')) + labs(color='Mean Expression ')


t-SNE


Independently of the perplexity parameter, the mean level of expression of the genes seems to continue to be the main feature that characterises the genes

perplexities = c(1,2,5,10,50,100)
ps = list()

for(i in 1:length(perplexities)){
  
  file_name = paste0('./../Data/Results/tsne_perplexity_',perplexities[i],'.csv')
  
  # Check if file with t-sne coordinates already exists, if not, calculate coordinates
  if(file.exists(file_name)){
    tsne = read.csv(file_name)
  } else {
    set.seed(123)
    tsne = datExpr %>% Rtsne(perplexity=perplexities[i])
    tsne_coords = cbind(tsne$Y, rownames(datExpr))
    colnames(tsne_coords) = c('C1','C2','ID')
    write.csv(tsne_coords, file_name, row.names=F)
    tsne = tsne_coords
  }
  
  # Plot results
  plot_data = data.frame('C1'=tsne[,1], 'C2'=tsne[,2], 'MeanExpr'=rowMeans(datExpr))
  ps[[i]] = plot_data %>% ggplot(aes(C1, C2, color=MeanExpr)) + geom_point(alpha=0.5) + theme_minimal() + coord_fixed() +
            scale_color_viridis() + ggtitle(paste0('Perplexity = ', perplexities[i])) + theme(legend.position='none')
}

grid.arrange(grobs=ps, nrow=2)



Mean level of expression


Samples belonging to the ASD group have statistically significantly higher levels of expression than the Control group (this was unexpected and consistent with the results in the Gandal dataset)

plot_data = data.frame('ID'=colnames(datExpr), 'Mean'=colMeans(datExpr)) %>% 
            left_join(datMeta, by=c('ID'='Sample_ID'))

plot_data %>% ggplot(aes(Diagnosis, Mean, fill = Diagnosis)) + 
              geom_boxplot(outlier.colour='gray', outlier.shape='o', outlier.size=3) + 
              stat_compare_means(label = 'p.signif', method = 't.test', method.args = list(var.equal = FALSE)) + 
              ylab('Mean Expression') + theme_minimal() + theme(legend.position = 'none')


Samples extracted from the frontal lobe have a slightly lower mean level of expression than the samples from the other lobes, but this difference is not statistically significant (this is consistent with the Gandal dataset)

plot_data %>% ggplot(aes(Brain_lobe, Mean)) + 
              geom_boxplot(outlier.colour='gray', outlier.shape='o', outlier.size=3, aes(fill = Brain_lobe)) + 
              stat_compare_means(label = 'p.signif', method = 't.test', method.args = list(var.equal = FALSE)) + 
              xlab('Brain Lobe') + ylab('Mean Expression') + theme(legend.position = 'none') + 
              theme_minimal() + theme(legend.position = 'none')




2.3.2 Differential Expression Analysis


Log fold change threshold


There are 460 DE genes using a threshold of LFC=0 (~4% of the total number of genes). As the threshold increases, the number of DE genes quickly decreases.

lfc_list = seq(0, 0.2, 0.01)

n_genes = nrow(datExpr)

# PCA
datExpr_pca_samps = datExpr %>% data.frame %>% t %>% prcomp

# Initialise data frame to save PCA outputs
pcas_samps = datExpr_pca_samps$x %>% data.frame %>% dplyr::select(PC1:PC2) %>% 
             mutate('ID'=colnames(datExpr), 'lfc'=-1, PC1=scale(PC1), PC2=scale(PC2))

pca_samps_old = pcas_samps

for(lfc in lfc_list){
  
  # Recalculate DE_info with the new threshold (p-values change) an filter DE genes
  DE_genes = results(dds, lfcThreshold=lfc, altHypothesis='greaterAbs') %>% data.frame %>%
             mutate('ID'=rownames(.)) %>% filter(padj<0.05)
  
  datExpr_DE = datExpr %>% data.frame %>% filter(rownames(.) %in% DE_genes$ID)
  n_genes = c(n_genes, nrow(DE_genes))
  
  # Calculate PCAs
  datExpr_pca_samps = datExpr_DE %>% t %>% prcomp

  # Create new DF entries
  pca_samps_new = datExpr_pca_samps$x %>% data.frame %>% dplyr::select(PC1:PC2) %>% 
                  mutate('ID'=colnames(datExpr), 'lfc'=lfc, PC1=scale(PC1), PC2=scale(PC2))
  
  # Change PC sign if necessary
  if(cor(pca_samps_new$PC1, pca_samps_old$PC1)<0) pca_samps_new$PC1 = -pca_samps_new$PC1
  if(cor(pca_samps_new$PC2, pca_samps_old$PC2)<0) pca_samps_new$PC2 = -pca_samps_new$PC2
  
  pca_samps_old = pca_samps_new
  
  # Update DFs
  pcas_samps = rbind(pcas_samps, pca_samps_new)
  
}

# Add Diagnosis information
pcas_samps = pcas_samps %>% left_join(datMeta, by=c('ID'='Sample_ID')) %>%
             dplyr::select(ID, PC1, PC2, lfc, Diagnosis)

# Plot change of number of genes
ggplotly(data.frame('lfc'=lfc_list, 'n_genes'=n_genes[-1]) %>% ggplot(aes(x=lfc, y=n_genes)) + 
         geom_point() + geom_line() + theme_minimal() + xlab('Log Fold Change Magnitude') + ylab('DE Genes') +
         ggtitle('Number of Differentially Expressed genes when modifying filtering threshold'))
rm(lfc_list, n_genes, lfc, pca_samps_new, pca_samps_old, datExpr_pca_samps)



Set of differentially expressed genes


Volcano plot

library(ggrepel)

ggplotly(genes_info %>% mutate(gene_name = datGenes$hgnc_symbol) %>%
        ggplot(aes(shrunken_log2FoldChange, padj, color=significant)) + 
        geom_point(alpha=0.2, aes(id = gene_name)) + scale_y_sqrt() +
        xlab('LFC') + ylab('Adjusted p-value') + theme_minimal() + labs(color = 'DE')) #+
               #geom_label_repel(aes(label=ifelse(shrunken_log2FoldChange>0.5 | shrunken_log2FoldChange< -0.39, 
               #                                  as.character(gene_name),'')), direction = 'y', nudge_y = 0.01)


Most of the genes on this list are overexpressed in ASD and do not have a Neuronal tag

top_genes = genes_info %>% mutate(Gene = datGenes$hgnc_symbol) %>% filter(significant == TRUE) %>%
            arrange(-abs(shrunken_log2FoldChange)) %>% top_n(25, wt=abs(shrunken_log2FoldChange)) %>%
            mutate(Neuronal = as.logical(Neuronal), LFC = shrunken_log2FoldChange, n = 1:25)

kable(top_genes %>% dplyr::select(n, Gene, LFC, padj, Neuronal), caption = 'Top 25 DE genes ordered by LFC magnitude')
Top 25 DE genes ordered by LFC magnitude
n Gene LFC padj Neuronal
1 IFI6 0.7236715 0.0000439 FALSE
2 IFI44L 0.6625822 0.0000439 FALSE
3 S100A10 0.6435014 0.0167793 FALSE
4 PLEKHA4 0.6389285 0.0007250 FALSE
5 RARRES3 0.6113857 0.0000906 FALSE
6 DTX3L 0.5922509 0.0086884 FALSE
7 BST2 0.5760400 0.0009139 FALSE
8 C1R 0.5630945 0.0007250 FALSE
9 SLC4A7 0.5598158 0.0007250 FALSE
10 BTN3A2 0.5522450 0.0007250 FALSE
11 FCGBP 0.5405807 0.0285655 FALSE
12 NQO1 0.5356795 0.0113124 TRUE
13 ZNF395 0.5302268 0.0027712 FALSE
14 ISG15 0.5129203 0.0047928 FALSE
15 IFIT3 0.5116086 0.0043468 FALSE
16 OAS1 0.4756318 0.0338607 FALSE
17 PAK3 0.4686438 0.0015127 FALSE
18 IFI35 0.4660144 0.0023490 FALSE
19 IDH3A -0.4653906 0.0032818 FALSE
20 EIF4E1B -0.4628031 0.0018979 FALSE
21 TROVE2 0.4619790 0.0017383 FALSE
22 DAGLA -0.4595095 0.0084104 TRUE
23 CLN5 0.4550021 0.0118178 TRUE
24 MOV10 0.4541911 0.0059625 TRUE
25 LIN52 0.4526711 0.0043910 FALSE


PCA plot of samples

PCA plot of genes characterised by each LFC threshold’s corresponding set of DE genes.

  • The frame corresponding to LFC = -1 doesn’t correspond to any LFC, all of the genes in the dataset were used to create this plot

  • LFC thresholds from 0 to 0.2 seem to separate the samples by diagnosis better than the rest

  • The PC values for each LFC were scaled so they were easier to compare (without this scale, as the LFC increases, the points begin to cluster more and more tightly at the centre of the plot)

ggplotly(pcas_samps %>% mutate(abs_lfc=ifelse(lfc==-1,-1,lfc)) %>% 
         ggplot(aes(PC1, PC2, color=Diagnosis, shape=Diagnosis)) + 
         geom_point(aes(frame=abs_lfc, ids=ID), alpha=0.7) + coord_fixed() +
         theme_minimal() + ggtitle('PCA plot of samples modifying filtering threshold'))
 # Figure for thesis:
# p1 = pcas_samps %>% filter(lfc == 0) %>% ggplot(aes(PC1, PC2, color=Diagnosis, shape=Diagnosis)) + 
#      geom_point(alpha=0.7) + theme_minimal() + ggtitle('LFC = 0') + theme(legend.position = 'none') + coord_fixed()
# p2 = pcas_samps %>% filter(lfc == 0.05) %>% ggplot(aes(PC1, PC2, color=Diagnosis, shape=Diagnosis)) + 
#      geom_point(alpha=0.7) + theme_minimal() + ggtitle('LFC = 0.05') + theme(legend.position = 'none') + coord_fixed()
# p3 = pcas_samps %>% filter(lfc == 0.1) %>% ggplot(aes(PC1, PC2, color=Diagnosis, shape=Diagnosis)) + 
#      geom_point(alpha=0.7) + theme_minimal() + ggtitle('LFC = 0.1') + theme(legend.position = 'none') + coord_fixed()
# p4 = pcas_samps %>% filter(lfc == 0.2) %>% ggplot(aes(PC1, PC2, color=Diagnosis, shape=Diagnosis)) + 
#      geom_point(alpha=0.7) + theme_minimal() + ggtitle('LFC = 0.2') + coord_fixed()
# ggarrange(p1,p2,p3,p4, nrow=1, common.legend = TRUE, legend='bottom', widths = c(0.22, 0.25, 0.28, 0.24))


PCA plot of samples characterised by all of the genes vs characterised only by the genes found to be differentially expressed (this plot is just a side-by-side comparison of two frames from the plot above)

pca = datExpr %>% t %>% prcomp

plot_data = pca$x %>% data.frame %>% dplyr::select(PC1:PC2) %>% 
            mutate('ID'= datExpr %>% colnames) %>% 
            left_join(datMeta, by=c('ID'='Sample_ID')) %>% 
            dplyr::select('ID','PC1','PC2','Diagnosis') %>% 
            mutate('Diagnosis' = factor(Diagnosis, levels=c('CTL','ASD')))

p1 = plot_data %>% ggplot(aes(PC1, PC2, color=Diagnosis, shape=Diagnosis)) + geom_point(alpha = 0.8) +
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],1),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],1),'%)')) +
              ggtitle('Characterising samples with all genes') +
              coord_fixed() + theme_minimal() + theme(legend.position = 'none')


pca = datExpr[genes_info$significant==TRUE,] %>% t %>% prcomp

plot_data = pca$x %>% data.frame %>% dplyr::select(PC1:PC2) %>% 
            mutate('ID'= datExpr %>% colnames) %>% 
            left_join(datMeta, by=c('ID'='Sample_ID')) %>% 
            dplyr::select('ID','PC1','PC2','Diagnosis') %>% 
            mutate('Diagnosis' = factor(Diagnosis, levels=c('CTL','ASD')))

p2 = plot_data %>% ggplot(aes(PC1, PC2, color=Diagnosis, shape=Diagnosis)) + geom_point(alpha = 0.8) +
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],1),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],1),'%)')) +
              ggtitle('Characterising samples with DE genes') +
              coord_fixed() + theme_minimal()

ggarrange(p1,p2, nrow=1, common.legend = TRUE, legend='bottom')

rm(p1, p2, pca, plot_data)


PCA plot of genes coloured by their LFC


The second principal component is strongly related to the LFC of the genes and when visualising only the differentially expressed genes, they form two clouds of points, one with the overexpressed genes and another with the underexpressed genes.

# All genes
pca = datExpr %>% prcomp

plot_data = genes_info %>% mutate(PC1 = pca$x[,1], PC2 = pca$x[,2])

pos_zero = -min(plot_data$shrunken_log2FoldChange)/(max(plot_data$shrunken_log2FoldChange)-min(plot_data$shrunken_log2FoldChange))
p1 = plot_data %>% ggplot(aes(PC1, PC2, color=shrunken_log2FoldChange)) + geom_point(alpha=0.5) +
    scale_color_gradientn(colours=c('#F8766D','#faa49e','white','#00BFC4','#009499'), 
                          values=c(0, pos_zero-0.15, pos_zero, pos_zero+0.15, 1)) +
    xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],1),'%)')) +
    ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],1),'%)')) + 
    theme_minimal() + ggtitle('PCA of all genes') + 
    theme(legend.position = 'bottom', legend.key.width=unit(2,'cm')) + labs(color = 'LFC ')

# Only DE genes
pca = datExpr[genes_info$significant,] %>% prcomp

plot_data = genes_info %>% dplyr::filter(significant == TRUE) %>% mutate(PC1 = pca$x[,1], PC2 = pca$x[,2])

pos_zero = -min(plot_data$shrunken_log2FoldChange)/(max(plot_data$shrunken_log2FoldChange)-min(plot_data$shrunken_log2FoldChange))
p2 = plot_data %>% ggplot(aes(PC1, PC2, color=shrunken_log2FoldChange)) + geom_point(alpha=0.5) +
    scale_color_gradientn(colours=c('#F8766D','#faa49e','white','#00BFC4','#009499'), 
                          values=c(0, pos_zero-0.15, pos_zero, pos_zero+0.15, 1)) +
    xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],1),'%)')) +
    ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],1),'%)')) + 
    theme_minimal() + ggtitle('PCA of differentially expressed genes') + 
    theme(legend.position = 'bottom', legend.key.width=unit(2,'cm')) + labs(color = 'LFC ')

ggarrange(p1,p2, nrow=1, common.legend = TRUE, legend='bottom')

rm(pca, plot_data, pos_zero, p1, p2)


Level of expression comparison between over and underexpressed genes


The difference in mean level of expression between over and underexpressed genes is statistically significant with a p=value lower than \(10^{-4}\)

plot_data = data.frame('MeanExpr' = rowMeans(datExpr), 
                       'Group' = ifelse(genes_info$shrunken_log2FoldChange>0,'overexpressed','underexpressed')) %>%
            mutate(Group = factor(Group, levels = c('underexpressed','overexpressed')))

plot_data[genes_info$significant==TRUE,] %>% ggplot(aes(Group, MeanExpr, fill = Group)) + 
              geom_boxplot(outlier.colour='gray', outlier.shape='o', outlier.size=3) + 
              stat_compare_means(label = 'p.signif', method = 't.test', method.args = list(var.equal = FALSE)) + 
              ylab('Mean Expression') + xlab('') + theme(legend.position = 'none') + 
              theme_minimal() + theme(legend.position = 'none')



Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggrepel_0.8.2               knitr_1.32                 
##  [3] biomaRt_2.40.5              DESeq2_1.24.0              
##  [5] SummarizedExperiment_1.14.1 DelayedArray_0.10.0        
##  [7] BiocParallel_1.18.1         matrixStats_0.56.0         
##  [9] Biobase_2.44.0              GenomicRanges_1.36.1       
## [11] GenomeInfoDb_1.20.0         IRanges_2.18.3             
## [13] S4Vectors_0.22.1            BiocGenerics_0.30.0        
## [15] ClusterR_1.2.1              gtools_3.8.2               
## [17] expss_0.10.2                rstatix_0.6.0              
## [19] Rtsne_0.15                  ggpubr_0.2.5               
## [21] magrittr_2.0.1              GGally_1.5.0               
## [23] gridExtra_2.3               viridis_0.5.1              
## [25] viridisLite_0.4.0           RColorBrewer_1.1-2         
## [27] plotlyutils_0.0.0.9000      plotly_4.9.2               
## [29] glue_1.4.2                  reshape2_1.4.4             
## [31] forcats_0.5.0               stringr_1.4.0              
## [33] dplyr_1.0.1                 purrr_0.3.4                
## [35] readr_1.3.1                 tidyr_1.1.0                
## [37] tibble_3.1.2                ggplot2_3.3.5              
## [39] tidyverse_1.3.0            
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1           backports_1.1.8        Hmisc_4.4-0           
##   [4] plyr_1.8.6             lazyeval_0.2.2         splines_3.6.3         
##   [7] gmp_0.6-1              crosstalk_1.1.0.1      digest_0.6.27         
##  [10] htmltools_0.5.1.1      fansi_0.5.0            checkmate_2.0.0       
##  [13] memoise_1.1.0          cluster_2.1.0          openxlsx_4.1.4        
##  [16] annotate_1.62.0        modelr_0.1.6           prettyunits_1.1.1     
##  [19] jpeg_0.1-8.1           colorspace_2.0-2       blob_1.2.1            
##  [22] rvest_0.3.5            haven_2.2.0            xfun_0.22             
##  [25] crayon_1.4.1           RCurl_1.98-1.2         jsonlite_1.7.2        
##  [28] genefilter_1.66.0      survival_3.2-7         gtable_0.3.0          
##  [31] zlibbioc_1.30.0        XVector_0.24.0         car_3.0-7             
##  [34] abind_1.4-5            scales_1.1.1           DBI_1.1.0             
##  [37] Rcpp_1.0.6             progress_1.2.2         xtable_1.8-4          
##  [40] htmlTable_1.13.3       foreign_0.8-76         bit_4.0.4             
##  [43] Formula_1.2-3          htmlwidgets_1.5.1      httr_1.4.2            
##  [46] acepack_1.4.1          ellipsis_0.3.2         farver_2.1.0          
##  [49] pkgconfig_2.0.3        reshape_0.8.8          XML_3.99-0.3          
##  [52] nnet_7.3-14            sass_0.4.0             dbplyr_1.4.2          
##  [55] locfit_1.5-9.4         utf8_1.2.1             labeling_0.4.2        
##  [58] tidyselect_1.1.0       rlang_0.4.11           AnnotationDbi_1.46.1  
##  [61] munsell_0.5.0          cellranger_1.1.0       tools_3.6.3           
##  [64] cli_2.5.0              generics_0.0.2         RSQLite_2.2.0         
##  [67] broom_0.7.0            evaluate_0.14          yaml_2.2.1            
##  [70] bit64_4.0.5            fs_1.5.0               zip_2.0.4             
##  [73] xml2_1.2.5             compiler_3.6.3         rstudioapi_0.11       
##  [76] curl_4.3               png_0.1-7              ggsignif_0.6.0        
##  [79] reprex_0.3.0           geneplotter_1.62.0     bslib_0.2.5.1         
##  [82] stringi_1.5.3          highr_0.8              lattice_0.20-41       
##  [85] Matrix_1.2-18          vctrs_0.3.8            pillar_1.6.1          
##  [88] lifecycle_1.0.0        jquerylib_0.1.4        cowplot_1.1.1         
##  [91] data.table_1.12.8      bitops_1.0-6           R6_2.5.0              
##  [94] latticeExtra_0.6-29    rio_0.5.16             assertthat_0.2.1      
##  [97] withr_2.4.2            GenomeInfoDbData_1.2.1 hms_0.5.3             
## [100] grid_3.6.3             rpart_4.1-15           rmarkdown_2.7         
## [103] carData_3.0-3          lubridate_1.7.4        base64enc_0.1-3